/*=======================================================================================
	IncomeProcess.do
	
		Uses the Personal Finance Website (PFW) data to estimate the income process
		for the buffer stock model
		
	Author: Lorenz Kueng, June 2017
=========================================================================================*/

cap log close PFW_10
log using "$homedir/log-files/PFW_10_$date.log", text replace name(PFW_10)

use "$homedir/data/stata/PFW_quarterly.dta", clear 
		
	generate date = Quarter
	format   date %tq
	xtset userid date


	** Create control variables variables

		foreach var in residentialstatus marriagestatus {
			display _n(1) "`var'"
			cap drop `var'_
			encode   `var', generate(`var'_)
			bysort userid (date): egen _temp = mode(`var'_), maxmode
			replace  `var'_ = _temp if `var'_==. & _temp!=. 
			replace  `var'_ = 999   if `var'_==. 
			drop _temp
		}
		foreach var in age edu profession homezipcode {
			display _n(1) "`var'"
			cap drop `var'_
			generate `var'_ = `var'
			bysort userid (date): egen _temp = mode(`var'_), maxmode
			replace  `var'_ = _temp if `var'_==. & _temp!=. 
			replace  `var'_ = 999   if `var'_==. 
			drop _temp
		}

		cap drop _temp
		gen D_incomeQ = D.incomeQ
		bysort userid (Quarter): egen _temp = mean(D_incomeQ)
		replace D_incomeQ = _temp if missing(D_incomeQ)==1
		drop _temp*

		foreach var in incomeQ D_incomeQ incomeY liquid_asset1 {
			winsor `var', g(`var'W) p(0.01)
			sum `var' `var'W, d
		}

	** Create permanent income: Total expenditures, excluding uncategorized transactions (ie only nondurables, services, durables)

		bysort userid: egen totexp_annual_nominal = mean(totexp1_nominal) // average quarterly total spending (nominal)
		bysort userid: egen totexp_annual = mean(totexp1) // average quarterly total spending (real)
		replace totexp_annual_nominal = -totexp_annual_nominal*4 // annualize and make expenditures positive
		replace totexp_annual = -totexp_annual*4
		winsor  totexp_annual_nominal, p(0.01) generate(totexp_annual_nominalW)
		winsor  totexp_annual, p(0.01) generate(totexp_annualW)
		
		generate totexp_annual_percap = totexp_annual / equivalence
		egen totexpY_quantile = xtile(totexp_annual_percap), n(5) by(year Alaska) 


	** Keep labor income

		sum cat30 cat3001 cat3003 cat3004 cat3005 cat3006 cat3007  cat50 cat5003, d // summarize income variables

		foreach cat in 3001 3004 30 { // 3003 3005 3006 3007 50 5003
			sum     cat`cat'            if cat`cat'!=0, d	
			replace cat`cat' = 0        if cat`cat'<0
			replace cat`cat' = `r(p99)' if cat`cat'>`r(p99)'
		}
		sum cat3001 cat3004 cat30, d


	** Collapse to annual frequency

		drop if date<q(2011q1) | date>q(2013q4)

		drop if age<25 | age>59

		keep  userid date year cat3001 cat3004 cat30 totexpY_quantile totexp_annual_percap Alaska family_sizeImp age_ edu_ marriagestatus_ residentialstatus_ profession_ homezipcode_ male children adults equivalence
		order userid date year cat3001 cat3004 cat30 totexpY_quantile totexp_annual_percap Alaska family_sizeImp age_ edu_ marriagestatus_ residentialstatus_ profession_ homezipcode_ male children adults equivalence

		collapse (sum) cat3001 /*cat3004 cat30*/ (first) totexpY_quantile totexp_annual_percap Alaska family_sizeImp age_ edu_ marriagestatus_ residentialstatus_ profession_ homezipcode_ male children adults equivalence date, by(userid year)


	** Calculate annual standard deviation of salary for each household

		gen     salary = cat3001/equivalence
		gen  ln_salary =  log(salary)

		foreach var in salary ln_salary {
			winsor `var', p(0.05) g(`var'W)
			egen   SD_`var' =  sd(`var'W), by(userid)
		}

		xtset userid year
		xtreg ln_salaryW L.ln_salaryW  i.Alaska i.family_sizeImp i.age_ i.edu_ i.marriagestatus_ i.residentialstatus_ i.profession_ i.homezipcode_ i.male i.children i.adults i.totexpY_quantile, fe

		cap drop ln_salaryW_resid
		predict  ln_salaryW_resid, residuals

		cap drop SD_ln_salaryW_resid
		egen     SD_ln_salaryW_resid = sd(ln_salaryW_resid), by(userid)

		bysort totexpY_quantile: sum SD_ln_salaryW_resid

		* calculate quarterly SD of transitory & permanent shock (assuming transitory and persistent shock have same SD = 14 SDs)
		forvalues i=1/5 {
			sum SD_ln_salaryW_resid if totexpY_quantile==`i'
			display `r(mean)'/sqrt(14)   // SD of transitory and persistent shock
			display `r(mean)'/sqrt(14)*2 // SD of quarterly shock
		}

log close PFW_10

